{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Cluster Likert Questions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original survey data:\n"
     ]
    },
    {
     "data": {
      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
       "columns": [
        {
         "name": "index",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "respondent_id",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "q1_ease_of_use",
         "rawType": "object",
         "type": "string"
        },
        {
         "name": "q2_product_quality",
         "rawType": "object",
         "type": "string"
        },
        {
         "name": "q3_value_for_money",
         "rawType": "object",
         "type": "string"
        },
        {
         "name": "q4_customer_service",
         "rawType": "object",
         "type": "string"
        },
        {
         "name": "q5_would_recommend",
         "rawType": "object",
         "type": "string"
        },
        {
         "name": "q6_meets_expectations",
         "rawType": "object",
         "type": "string"
        },
        {
         "name": "q7_better_than_competitors",
         "rawType": "object",
         "type": "string"
        },
        {
         "name": "q8_overall_satisfaction",
         "rawType": "object",
         "type": "string"
        }
       ],
       "conversionMethod": "pd.DataFrame",
       "ref": "1e525443-79e3-4d45-87f2-80f1812057d5",
       "rows": [
        [
         "0",
         "1",
         "Agree",
         "Strongly Agree",
         "Strongly Agree",
         "Agree",
         "Strongly Agree",
         "Neither Agree nor Disagree",
         "Neither Agree nor Disagree",
         "Agree"
        ],
        [
         "1",
         "2",
         "Strongly Agree",
         "Strongly Agree",
         "Strongly Agree",
         "Agree",
         "Agree",
         "Strongly Agree",
         "Strongly Agree",
         "Agree"
        ],
        [
         "2",
         "3",
         "Strongly Agree",
         "Neither Agree nor Disagree",
         "Agree",
         "Neither Agree nor Disagree",
         "Strongly Agree",
         "Agree",
         "Strongly Agree",
         "Strongly Agree"
        ],
        [
         "3",
         "4",
         "Agree",
         "Agree",
         "Strongly Agree",
         "Agree",
         "Strongly Agree",
         "Strongly Agree",
         "Strongly Agree",
         "Agree"
        ],
        [
         "4",
         "5",
         "Agree",
         "Strongly Agree",
         "Agree",
         "Agree",
         "Strongly Agree",
         "Agree",
         "Strongly Agree",
         "Agree"
        ]
       ],
       "shape": {
        "columns": 9,
        "rows": 5
       }
      },
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>respondent_id</th>\n",
       "      <th>q1_ease_of_use</th>\n",
       "      <th>q2_product_quality</th>\n",
       "      <th>q3_value_for_money</th>\n",
       "      <th>q4_customer_service</th>\n",
       "      <th>q5_would_recommend</th>\n",
       "      <th>q6_meets_expectations</th>\n",
       "      <th>q7_better_than_competitors</th>\n",
       "      <th>q8_overall_satisfaction</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Neither Agree nor Disagree</td>\n",
       "      <td>Neither Agree nor Disagree</td>\n",
       "      <td>Agree</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Agree</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Neither Agree nor Disagree</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Neither Agree nor Disagree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Agree</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Agree</td>\n",
       "      <td>Strongly Agree</td>\n",
       "      <td>Agree</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   respondent_id  q1_ease_of_use          q2_product_quality  \\\n",
       "0              1           Agree              Strongly Agree   \n",
       "1              2  Strongly Agree              Strongly Agree   \n",
       "2              3  Strongly Agree  Neither Agree nor Disagree   \n",
       "3              4           Agree                       Agree   \n",
       "4              5           Agree              Strongly Agree   \n",
       "\n",
       "  q3_value_for_money         q4_customer_service q5_would_recommend  \\\n",
       "0     Strongly Agree                       Agree     Strongly Agree   \n",
       "1     Strongly Agree                       Agree              Agree   \n",
       "2              Agree  Neither Agree nor Disagree     Strongly Agree   \n",
       "3     Strongly Agree                       Agree     Strongly Agree   \n",
       "4              Agree                       Agree     Strongly Agree   \n",
       "\n",
       "        q6_meets_expectations  q7_better_than_competitors  \\\n",
       "0  Neither Agree nor Disagree  Neither Agree nor Disagree   \n",
       "1              Strongly Agree              Strongly Agree   \n",
       "2                       Agree              Strongly Agree   \n",
       "3              Strongly Agree              Strongly Agree   \n",
       "4                       Agree              Strongly Agree   \n",
       "\n",
       "  q8_overall_satisfaction  \n",
       "0                   Agree  \n",
       "1                   Agree  \n",
       "2          Strongly Agree  \n",
       "3                   Agree  \n",
       "4                   Agree  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 03_cluster_likert_questions.ipynb\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from pandas_survey_toolkit import nlp\n",
    "from pandas_survey_toolkit.vis import cluster_heatmap_plot\n",
    "\n",
    "# Create sample survey data with Likert scale responses\n",
    "# Let's simulate a product satisfaction survey with 20 respondents and 8 Likert questions\n",
    "\n",
    "# Define our questions\n",
    "questions = [\n",
    "    'q1_ease_of_use', \n",
    "    'q2_product_quality',\n",
    "    'q3_value_for_money',\n",
    "    'q4_customer_service',\n",
    "    'q5_would_recommend',\n",
    "    'q6_meets_expectations',\n",
    "    'q7_better_than_competitors',\n",
    "    'q8_overall_satisfaction'\n",
    "]\n",
    "\n",
    "# Define our Likert scale options\n",
    "likert_options = [\n",
    "    'Strongly Disagree',\n",
    "    'Disagree',\n",
    "    'Neither Agree nor Disagree',\n",
    "    'Agree',\n",
    "    'Strongly Agree'\n",
    "]\n",
    "\n",
    "POPULATION = 200\n",
    "# Create DataFrame with 20 respondents\n",
    "np.random.seed(42)\n",
    "data = {'respondent_id': range(1, POPULATION)}\n",
    "\n",
    "# Generate random Likert responses with some patterns\n",
    "# Group 1 (respondents 1-7): Generally positive\n",
    "# Group 2 (respondents 8-14): Generally negative\n",
    "# Group 3 (respondents 15-20): Mixed responses\n",
    "\n",
    "for q in questions:\n",
    "    responses = []\n",
    "    for i in range(1, POPULATION):\n",
    "        if i <= (0.3 * POPULATION):  # Positive group\n",
    "            responses.append(np.random.choice(likert_options[2:], p=[0.1, 0.5, 0.4]))\n",
    "        elif i <= (0.6 * POPULATION):  # Negative group\n",
    "            responses.append(np.random.choice(likert_options[:3], p=[0.3, 0.5, 0.2]))\n",
    "        else:  # Don't care group\n",
    "            responses.append(np.random.choice(likert_options[1:4], p=[0.1,0.8,0.1]))\n",
    "    data[q] = responses\n",
    "\n",
    "# Create DataFrame\n",
    "df = pd.DataFrame(data)\n",
    "\n",
    "# Display the original data\n",
    "print(\"Original survey data:\")\n",
    "display(df.head())\n",
    "\n",
    "# Define custom mapping for Likert scale values\n",
    "custom_mapping = {\n",
    "    'strongly disagree': -1,\n",
    "    'disagree': -1,\n",
    "    'neither agree nor disagree': 0,\n",
    "    'agree': 1,\n",
    "    'strongly agree': 1\n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using default mapping:\n",
      "-1: Phrases containing 'disagree', 'do not agree', etc.\n",
      " 0: Phrases containing 'neutral', 'neither', 'unsure', etc.\n",
      "+1: Phrases containing 'agree' (but not 'disagree' or 'not agree')\n",
      "NaN: NaN values are preserved\n",
      "  Agree -> 1: 282 times\n",
      "  Strongly Agree -> 1: 199 times\n",
      "  Neither Agree nor Disagree -> 0: 668 times\n",
      "  Disagree -> -1: 293 times\n",
      "  Strongly Disagree -> -1: 150 times\n",
      "\n",
      "Encoded Likert data:\n"
     ]
    },
    {
     "data": {
      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
       "columns": [
        {
         "name": "index",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "respondent_id",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "likert_encoded_q1_ease_of_use",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "likert_encoded_q2_product_quality",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "likert_encoded_q3_value_for_money",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "likert_encoded_q4_customer_service",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "likert_encoded_q5_would_recommend",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "likert_encoded_q6_meets_expectations",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "likert_encoded_q7_better_than_competitors",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "likert_encoded_q8_overall_satisfaction",
         "rawType": "int64",
         "type": "integer"
        }
       ],
       "conversionMethod": "pd.DataFrame",
       "ref": "0fb30361-1b2d-4d07-b4f8-f350bffdc906",
       "rows": [
        [
         "0",
         "1",
         "1",
         "1",
         "1",
         "1",
         "1",
         "0",
         "0",
         "1"
        ],
        [
         "1",
         "2",
         "1",
         "1",
         "1",
         "1",
         "1",
         "1",
         "1",
         "1"
        ],
        [
         "2",
         "3",
         "1",
         "0",
         "1",
         "0",
         "1",
         "1",
         "1",
         "1"
        ],
        [
         "3",
         "4",
         "1",
         "1",
         "1",
         "1",
         "1",
         "1",
         "1",
         "1"
        ],
        [
         "4",
         "5",
         "1",
         "1",
         "1",
         "1",
         "1",
         "1",
         "1",
         "1"
        ]
       ],
       "shape": {
        "columns": 9,
        "rows": 5
       }
      },
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>respondent_id</th>\n",
       "      <th>likert_encoded_q1_ease_of_use</th>\n",
       "      <th>likert_encoded_q2_product_quality</th>\n",
       "      <th>likert_encoded_q3_value_for_money</th>\n",
       "      <th>likert_encoded_q4_customer_service</th>\n",
       "      <th>likert_encoded_q5_would_recommend</th>\n",
       "      <th>likert_encoded_q6_meets_expectations</th>\n",
       "      <th>likert_encoded_q7_better_than_competitors</th>\n",
       "      <th>likert_encoded_q8_overall_satisfaction</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   respondent_id  likert_encoded_q1_ease_of_use  \\\n",
       "0              1                              1   \n",
       "1              2                              1   \n",
       "2              3                              1   \n",
       "3              4                              1   \n",
       "4              5                              1   \n",
       "\n",
       "   likert_encoded_q2_product_quality  likert_encoded_q3_value_for_money  \\\n",
       "0                                  1                                  1   \n",
       "1                                  1                                  1   \n",
       "2                                  0                                  1   \n",
       "3                                  1                                  1   \n",
       "4                                  1                                  1   \n",
       "\n",
       "   likert_encoded_q4_customer_service  likert_encoded_q5_would_recommend  \\\n",
       "0                                   1                                  1   \n",
       "1                                   1                                  1   \n",
       "2                                   0                                  1   \n",
       "3                                   1                                  1   \n",
       "4                                   1                                  1   \n",
       "\n",
       "   likert_encoded_q6_meets_expectations  \\\n",
       "0                                     0   \n",
       "1                                     1   \n",
       "2                                     1   \n",
       "3                                     1   \n",
       "4                                     1   \n",
       "\n",
       "   likert_encoded_q7_better_than_competitors  \\\n",
       "0                                          0   \n",
       "1                                          1   \n",
       "2                                          1   \n",
       "3                                          1   \n",
       "4                                          1   \n",
       "\n",
       "   likert_encoded_q8_overall_satisfaction  \n",
       "0                                       1  \n",
       "1                                       1  \n",
       "2                                       1  \n",
       "3                                       1  \n",
       "4                                       1  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Question clustering results:\n"
     ]
    },
    {
     "data": {
      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
       "columns": [
        {
         "name": "index",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "respondent_id",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "question_cluster_id",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "question_cluster_probability",
         "rawType": "float64",
         "type": "float"
        }
       ],
       "conversionMethod": "pd.DataFrame",
       "ref": "254dc5ba-94ba-4684-99a5-a022533a00b8",
       "rows": [
        [
         "0",
         "1",
         "0.0",
         "0.5702343598374491"
        ],
        [
         "1",
         "2",
         "0.0",
         "1.0"
        ],
        [
         "2",
         "3",
         "0.0",
         "0.5627238816993246"
        ],
        [
         "3",
         "4",
         "0.0",
         "1.0"
        ],
        [
         "4",
         "5",
         "0.0",
         "1.0"
        ]
       ],
       "shape": {
        "columns": 3,
        "rows": 5
       }
      },
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>respondent_id</th>\n",
       "      <th>question_cluster_id</th>\n",
       "      <th>question_cluster_probability</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.570234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.562724</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   respondent_id  question_cluster_id  question_cluster_probability\n",
       "0              1                  0.0                      0.570234\n",
       "1              2                  0.0                      1.000000\n",
       "2              3                  0.0                      0.562724\n",
       "3              4                  0.0                      1.000000\n",
       "4              5                  0.0                      1.000000"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Cluster heatmap showing the sentiment distribution across questions:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-18d834e0cbcc4e34a73beb3934be5e6c.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-18d834e0cbcc4e34a73beb3934be5e6c.vega-embed details,\n",
       "  #altair-viz-18d834e0cbcc4e34a73beb3934be5e6c.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-18d834e0cbcc4e34a73beb3934be5e6c\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-18d834e0cbcc4e34a73beb3934be5e6c\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-18d834e0cbcc4e34a73beb3934be5e6c\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300, \"strokeWidth\": 0}, \"axis\": {\"labelLimit\": 350}}, \"vconcat\": [{\"layer\": [{\"mark\": {\"type\": \"bar\"}, \"encoding\": {\"tooltip\": [{\"field\": \"question_cluster_id\", \"title\": \"Cluster ID\", \"type\": \"ordinal\"}, {\"field\": \"count\", \"title\": \"Count\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"question_cluster_id\", \"sort\": [0.0, 1.0, 3.0, 2.0], \"title\": \"Cluster ID\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"count\", \"title\": \"Count\", \"type\": \"quantitative\"}}, \"title\": \"Cluster Sizes\"}, {\"mark\": {\"type\": \"text\", \"align\": \"center\", \"baseline\": \"bottom\", \"dy\": -5}, \"encoding\": {\"text\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"tooltip\": [{\"field\": \"question_cluster_id\", \"title\": \"Cluster ID\", \"type\": \"ordinal\"}, {\"field\": \"count\", \"title\": \"Count\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"question_cluster_id\", \"sort\": [0.0, 1.0, 3.0, 2.0], \"title\": \"Cluster ID\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"count\", \"title\": \"Count\", \"type\": \"quantitative\"}}, \"title\": \"Cluster Sizes\"}], \"data\": {\"name\": \"data-d49e285143f6db9c1ca5cf4123ff8a93\"}, \"height\": 100, \"width\": 600}, {\"layer\": [{\"mark\": {\"type\": \"rect\"}, \"encoding\": {\"color\": {\"field\": \"background_color\", \"scale\": null, \"type\": \"nominal\"}, \"tooltip\": [{\"field\": \"question_cluster_id\", \"title\": \"Cluster ID\", \"type\": \"ordinal\"}, {\"field\": \"question\", \"title\": \"Question\", \"type\": \"ordinal\"}, {\"field\": \"percent_positive\", \"format\": \".2%\", \"title\": \"% Positive\", \"type\": \"quantitative\"}, {\"field\": \"percent_negative\", \"format\": \".2%\", \"title\": \"% Negative\", \"type\": \"quantitative\"}, {\"field\": \"percent_neutral\", \"format\": \".2%\", \"title\": \"% Neutral\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"question_cluster_id\", \"sort\": [0.0, 1.0, 3.0, 2.0], \"title\": \"Cluster ID\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"wrapped_question\", \"sort\": [\" q1 ease of use\", \" q2 product quality\", \" q3 value for money\", \" q4 customer service\", \" q5 would recommend\", \" q6 meets expectations\", \" q7 better than competitors\", \" q8 overall satisfaction\"], \"title\": null, \"type\": \"ordinal\"}}, \"title\": \"Cluster Heatmap: Sentiment Distribution\"}, {\"mark\": {\"type\": \"text\", \"baseline\": \"middle\"}, \"encoding\": {\"color\": {\"field\": \"text_color\", \"scale\": null, \"type\": \"nominal\"}, \"text\": {\"field\": \"percent_positive\", \"format\": \".0%\", \"type\": \"quantitative\"}, \"tooltip\": [{\"field\": \"question_cluster_id\", \"title\": \"Cluster ID\", \"type\": \"ordinal\"}, {\"field\": \"question\", \"title\": \"Question\", \"type\": \"ordinal\"}, {\"field\": \"percent_positive\", \"format\": \".2%\", \"title\": \"% Positive\", \"type\": \"quantitative\"}, {\"field\": \"percent_negative\", \"format\": \".2%\", \"title\": \"% Negative\", \"type\": \"quantitative\"}, {\"field\": \"percent_neutral\", \"format\": \".2%\", \"title\": \"% Neutral\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"question_cluster_id\", \"sort\": [0.0, 1.0, 3.0, 2.0], \"title\": \"Cluster ID\", \"type\": \"ordinal\"}, \"y\": {\"field\": \"wrapped_question\", \"sort\": [\" q1 ease of use\", \" q2 product quality\", \" q3 value for money\", \" q4 customer service\", \" q5 would recommend\", \" q6 meets expectations\", \" q7 better than competitors\", \" q8 overall satisfaction\"], \"title\": null, \"type\": \"ordinal\"}}, \"title\": \"Cluster Heatmap: Sentiment Distribution\"}], \"data\": {\"name\": \"data-c0689938aa7f7f025a4e6d6ce4e10aaa\"}, \"height\": 240, \"width\": 600}], \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-d49e285143f6db9c1ca5cf4123ff8a93\": [{\"question_cluster_id\": 0.0, \"count\": 61}, {\"question_cluster_id\": 1.0, \"count\": 49}, {\"question_cluster_id\": 3.0, \"count\": 72}, {\"question_cluster_id\": 2.0, \"count\": 17}], \"data-c0689938aa7f7f025a4e6d6ce4e10aaa\": [{\"question_cluster_id\": 0.0, \"question\": \" q1 ease of use\", \"percent_positive\": 0.8524590163934426, \"percent_negative\": 0.0, \"percent_neutral\": 0.14754098360655743, \"wrapped_question\": \" q1 ease of use\", \"background_color\": \"#1a9641\", \"text_color\": \"white\"}, {\"question_cluster_id\": 1.0, \"question\": \" q1 ease of use\", \"percent_positive\": 0.14285714285714285, \"percent_negative\": 0.10204081632653061, \"percent_neutral\": 0.7551020408163266, \"wrapped_question\": \" q1 ease of use\", \"background_color\": \"#ffffbf\", \"text_color\": \"black\"}, {\"question_cluster_id\": 2.0, \"question\": \" q1 ease of use\", \"percent_positive\": 0.0, \"percent_negative\": 0.0, \"percent_neutral\": 1.0, \"wrapped_question\": \" q1 ease of use\", \"background_color\": \"#f7f7f7\", \"text_color\": \"black\"}, {\"question_cluster_id\": 3.0, \"question\": \" q1 ease of use\", \"percent_positive\": 0.013888888888888888, \"percent_negative\": 0.6944444444444444, \"percent_neutral\": 0.29166666666666674, \"wrapped_question\": \" q1 ease of use\", \"background_color\": \"#d7191c\", \"text_color\": \"white\"}, {\"question_cluster_id\": 0.0, \"question\": \" q2 product quality\", \"percent_positive\": 0.8852459016393442, \"percent_negative\": 0.0, \"percent_neutral\": 0.11475409836065575, \"wrapped_question\": \" q2 product quality\", \"background_color\": \"#1a9641\", \"text_color\": \"white\"}, {\"question_cluster_id\": 1.0, \"question\": \" q2 product quality\", \"percent_positive\": 0.12244897959183673, \"percent_negative\": 0.20408163265306123, \"percent_neutral\": 0.673469387755102, \"wrapped_question\": \" q2 product quality\", \"background_color\": \"#f4a582\", \"text_color\": \"black\"}, {\"question_cluster_id\": 2.0, \"question\": \" q2 product quality\", \"percent_positive\": 0.0, \"percent_negative\": 0.0, \"percent_neutral\": 1.0, \"wrapped_question\": \" q2 product quality\", \"background_color\": \"#f7f7f7\", \"text_color\": \"black\"}, {\"question_cluster_id\": 3.0, \"question\": \" q2 product quality\", \"percent_positive\": 0.013888888888888888, \"percent_negative\": 0.7083333333333334, \"percent_neutral\": 0.2777777777777778, \"wrapped_question\": \" q2 product quality\", \"background_color\": \"#d7191c\", \"text_color\": \"white\"}, {\"question_cluster_id\": 0.0, \"question\": \" q3 value for money\", \"percent_positive\": 0.819672131147541, \"percent_negative\": 0.0, \"percent_neutral\": 0.180327868852459, \"wrapped_question\": \" q3 value for money\", \"background_color\": \"#1a9641\", \"text_color\": \"white\"}, {\"question_cluster_id\": 1.0, \"question\": \" q3 value for money\", \"percent_positive\": 0.20408163265306123, \"percent_negative\": 0.08163265306122448, \"percent_neutral\": 0.7142857142857143, \"wrapped_question\": \" q3 value for money\", \"background_color\": \"#ffffbf\", \"text_color\": \"black\"}, {\"question_cluster_id\": 2.0, \"question\": \" q3 value for money\", \"percent_positive\": 0.0, \"percent_negative\": 0.0, \"percent_neutral\": 1.0, \"wrapped_question\": \" q3 value for money\", \"background_color\": \"#f7f7f7\", \"text_color\": \"black\"}, {\"question_cluster_id\": 3.0, \"question\": \" q3 value for money\", \"percent_positive\": 0.0, \"percent_negative\": 0.5694444444444444, \"percent_neutral\": 0.4305555555555556, \"wrapped_question\": \" q3 value for money\", \"background_color\": \"#fdae61\", \"text_color\": \"black\"}, {\"question_cluster_id\": 0.0, \"question\": \" q4 customer service\", \"percent_positive\": 0.8688524590163934, \"percent_negative\": 0.0, \"percent_neutral\": 0.1311475409836066, \"wrapped_question\": \" q4 customer service\", \"background_color\": \"#1a9641\", \"text_color\": \"white\"}, {\"question_cluster_id\": 1.0, \"question\": \" q4 customer service\", \"percent_positive\": 0.10204081632653061, \"percent_negative\": 0.02040816326530612, \"percent_neutral\": 0.8775510204081632, \"wrapped_question\": \" q4 customer service\", \"background_color\": \"#ffffbf\", \"text_color\": \"black\"}, {\"question_cluster_id\": 2.0, \"question\": \" q4 customer service\", \"percent_positive\": 0.0, \"percent_negative\": 0.0, \"percent_neutral\": 1.0, \"wrapped_question\": \" q4 customer service\", \"background_color\": \"#f7f7f7\", \"text_color\": \"black\"}, {\"question_cluster_id\": 3.0, \"question\": \" q4 customer service\", \"percent_positive\": 0.0, \"percent_negative\": 0.7083333333333334, \"percent_neutral\": 0.29166666666666663, \"wrapped_question\": \" q4 customer service\", \"background_color\": \"#d7191c\", \"text_color\": \"white\"}, {\"question_cluster_id\": 0.0, \"question\": \" q5 would recommend\", \"percent_positive\": 0.9180327868852459, \"percent_negative\": 0.0, \"percent_neutral\": 0.08196721311475408, \"wrapped_question\": \" q5 would recommend\", \"background_color\": \"#1a9641\", \"text_color\": \"white\"}, {\"question_cluster_id\": 1.0, \"question\": \" q5 would recommend\", \"percent_positive\": 0.10204081632653061, \"percent_negative\": 0.02040816326530612, \"percent_neutral\": 0.8775510204081632, \"wrapped_question\": \" q5 would recommend\", \"background_color\": \"#ffffbf\", \"text_color\": \"black\"}, {\"question_cluster_id\": 2.0, \"question\": \" q5 would recommend\", \"percent_positive\": 0.0, \"percent_negative\": 0.0, \"percent_neutral\": 1.0, \"wrapped_question\": \" q5 would recommend\", \"background_color\": \"#f7f7f7\", \"text_color\": \"black\"}, {\"question_cluster_id\": 3.0, \"question\": \" q5 would recommend\", \"percent_positive\": 0.0, \"percent_negative\": 0.8333333333333334, \"percent_neutral\": 0.16666666666666663, \"wrapped_question\": \" q5 would recommend\", \"background_color\": \"#d7191c\", \"text_color\": \"white\"}, {\"question_cluster_id\": 0.0, \"question\": \" q6 meets expectations\", \"percent_positive\": 0.8524590163934426, \"percent_negative\": 0.0, \"percent_neutral\": 0.14754098360655743, \"wrapped_question\": \" q6 meets expectations\", \"background_color\": \"#1a9641\", \"text_color\": \"white\"}, {\"question_cluster_id\": 1.0, \"question\": \" q6 meets expectations\", \"percent_positive\": 0.10204081632653061, \"percent_negative\": 0.14285714285714285, \"percent_neutral\": 0.7551020408163265, \"wrapped_question\": \" q6 meets expectations\", \"background_color\": \"#f4a582\", \"text_color\": \"black\"}, {\"question_cluster_id\": 2.0, \"question\": \" q6 meets expectations\", \"percent_positive\": 0.0, \"percent_negative\": 0.0, \"percent_neutral\": 1.0, \"wrapped_question\": \" q6 meets expectations\", \"background_color\": \"#f7f7f7\", \"text_color\": \"black\"}, {\"question_cluster_id\": 3.0, \"question\": \" q6 meets expectations\", \"percent_positive\": 0.0, \"percent_negative\": 0.6666666666666666, \"percent_neutral\": 0.33333333333333337, \"wrapped_question\": \" q6 meets expectations\", \"background_color\": \"#d7191c\", \"text_color\": \"white\"}, {\"question_cluster_id\": 0.0, \"question\": \" q7 better than competitors\", \"percent_positive\": 0.8852459016393442, \"percent_negative\": 0.0, \"percent_neutral\": 0.11475409836065575, \"wrapped_question\": \" q7 better than competitors\", \"background_color\": \"#1a9641\", \"text_color\": \"white\"}, {\"question_cluster_id\": 1.0, \"question\": \" q7 better than competitors\", \"percent_positive\": 0.20408163265306123, \"percent_negative\": 0.02040816326530612, \"percent_neutral\": 0.7755102040816326, \"wrapped_question\": \" q7 better than competitors\", \"background_color\": \"#ffffbf\", \"text_color\": \"black\"}, {\"question_cluster_id\": 2.0, \"question\": \" q7 better than competitors\", \"percent_positive\": 0.0, \"percent_negative\": 0.0, \"percent_neutral\": 1.0, \"wrapped_question\": \" q7 better than competitors\", \"background_color\": \"#f7f7f7\", \"text_color\": \"black\"}, {\"question_cluster_id\": 3.0, \"question\": \" q7 better than competitors\", \"percent_positive\": 0.0, \"percent_negative\": 0.7777777777777778, \"percent_neutral\": 0.2222222222222222, \"wrapped_question\": \" q7 better than competitors\", \"background_color\": \"#d7191c\", \"text_color\": \"white\"}, {\"question_cluster_id\": 0.0, \"question\": \" q8 overall satisfaction\", \"percent_positive\": 0.8852459016393442, \"percent_negative\": 0.0, \"percent_neutral\": 0.11475409836065575, \"wrapped_question\": \" q8 overall satisfaction\", \"background_color\": \"#1a9641\", \"text_color\": \"white\"}, {\"question_cluster_id\": 1.0, \"question\": \" q8 overall satisfaction\", \"percent_positive\": 0.12244897959183673, \"percent_negative\": 0.12244897959183673, \"percent_neutral\": 0.7551020408163265, \"wrapped_question\": \" q8 overall satisfaction\", \"background_color\": \"#f7f7f7\", \"text_color\": \"black\"}, {\"question_cluster_id\": 2.0, \"question\": \" q8 overall satisfaction\", \"percent_positive\": 0.0, \"percent_negative\": 0.0, \"percent_neutral\": 1.0, \"wrapped_question\": \" q8 overall satisfaction\", \"background_color\": \"#f7f7f7\", \"text_color\": \"black\"}, {\"question_cluster_id\": 3.0, \"question\": \" q8 overall satisfaction\", \"percent_positive\": 0.0, \"percent_negative\": 0.7083333333333334, \"percent_neutral\": 0.29166666666666663, \"wrapped_question\": \" q8 overall satisfaction\", \"background_color\": \"#d7191c\", \"text_color\": \"white\"}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.VConcatChart(...)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Cluster averages for each question:\n"
     ]
    },
    {
     "data": {
      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
       "columns": [
        {
         "name": "question_cluster_id",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "likert_encoded_q1_ease_of_use",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "likert_encoded_q2_product_quality",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "likert_encoded_q3_value_for_money",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "likert_encoded_q4_customer_service",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "likert_encoded_q5_would_recommend",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "likert_encoded_q6_meets_expectations",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "likert_encoded_q7_better_than_competitors",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "likert_encoded_q8_overall_satisfaction",
         "rawType": "float64",
         "type": "float"
        }
       ],
       "conversionMethod": "pd.DataFrame",
       "ref": "8df84408-1730-41d7-b372-6aca72ddf886",
       "rows": [
        [
         "0.0",
         "0.8524590163934426",
         "0.8852459016393442",
         "0.819672131147541",
         "0.8688524590163934",
         "0.9180327868852459",
         "0.8524590163934426",
         "0.8852459016393442",
         "0.8852459016393442"
        ],
        [
         "1.0",
         "0.04081632653061224",
         "-0.08163265306122448",
         "0.12244897959183673",
         "0.08163265306122448",
         "0.08163265306122448",
         "-0.04081632653061224",
         "0.1836734693877551",
         "0.0"
        ],
        [
         "2.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0",
         "0.0"
        ],
        [
         "3.0",
         "-0.6805555555555556",
         "-0.6944444444444444",
         "-0.5694444444444444",
         "-0.7083333333333334",
         "-0.8333333333333334",
         "-0.6666666666666666",
         "-0.7777777777777778",
         "-0.7083333333333334"
        ]
       ],
       "shape": {
        "columns": 8,
        "rows": 4
       }
      },
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>likert_encoded_q1_ease_of_use</th>\n",
       "      <th>likert_encoded_q2_product_quality</th>\n",
       "      <th>likert_encoded_q3_value_for_money</th>\n",
       "      <th>likert_encoded_q4_customer_service</th>\n",
       "      <th>likert_encoded_q5_would_recommend</th>\n",
       "      <th>likert_encoded_q6_meets_expectations</th>\n",
       "      <th>likert_encoded_q7_better_than_competitors</th>\n",
       "      <th>likert_encoded_q8_overall_satisfaction</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>question_cluster_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0.0</th>\n",
       "      <td>0.852459</td>\n",
       "      <td>0.885246</td>\n",
       "      <td>0.819672</td>\n",
       "      <td>0.868852</td>\n",
       "      <td>0.918033</td>\n",
       "      <td>0.852459</td>\n",
       "      <td>0.885246</td>\n",
       "      <td>0.885246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.040816</td>\n",
       "      <td>-0.081633</td>\n",
       "      <td>0.122449</td>\n",
       "      <td>0.081633</td>\n",
       "      <td>0.081633</td>\n",
       "      <td>-0.040816</td>\n",
       "      <td>0.183673</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2.0</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3.0</th>\n",
       "      <td>-0.680556</td>\n",
       "      <td>-0.694444</td>\n",
       "      <td>-0.569444</td>\n",
       "      <td>-0.708333</td>\n",
       "      <td>-0.833333</td>\n",
       "      <td>-0.666667</td>\n",
       "      <td>-0.777778</td>\n",
       "      <td>-0.708333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     likert_encoded_q1_ease_of_use  \\\n",
       "question_cluster_id                                  \n",
       "0.0                                       0.852459   \n",
       "1.0                                       0.040816   \n",
       "2.0                                       0.000000   \n",
       "3.0                                      -0.680556   \n",
       "\n",
       "                     likert_encoded_q2_product_quality  \\\n",
       "question_cluster_id                                      \n",
       "0.0                                           0.885246   \n",
       "1.0                                          -0.081633   \n",
       "2.0                                           0.000000   \n",
       "3.0                                          -0.694444   \n",
       "\n",
       "                     likert_encoded_q3_value_for_money  \\\n",
       "question_cluster_id                                      \n",
       "0.0                                           0.819672   \n",
       "1.0                                           0.122449   \n",
       "2.0                                           0.000000   \n",
       "3.0                                          -0.569444   \n",
       "\n",
       "                     likert_encoded_q4_customer_service  \\\n",
       "question_cluster_id                                       \n",
       "0.0                                            0.868852   \n",
       "1.0                                            0.081633   \n",
       "2.0                                            0.000000   \n",
       "3.0                                           -0.708333   \n",
       "\n",
       "                     likert_encoded_q5_would_recommend  \\\n",
       "question_cluster_id                                      \n",
       "0.0                                           0.918033   \n",
       "1.0                                           0.081633   \n",
       "2.0                                           0.000000   \n",
       "3.0                                          -0.833333   \n",
       "\n",
       "                     likert_encoded_q6_meets_expectations  \\\n",
       "question_cluster_id                                         \n",
       "0.0                                              0.852459   \n",
       "1.0                                             -0.040816   \n",
       "2.0                                              0.000000   \n",
       "3.0                                             -0.666667   \n",
       "\n",
       "                     likert_encoded_q7_better_than_competitors  \\\n",
       "question_cluster_id                                              \n",
       "0.0                                                   0.885246   \n",
       "1.0                                                   0.183673   \n",
       "2.0                                                   0.000000   \n",
       "3.0                                                  -0.777778   \n",
       "\n",
       "                     likert_encoded_q8_overall_satisfaction  \n",
       "question_cluster_id                                          \n",
       "0.0                                                0.885246  \n",
       "1.0                                                0.000000  \n",
       "2.0                                                0.000000  \n",
       "3.0                                               -0.708333  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Number of respondents in each cluster:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "question_cluster_id\n",
       "0.0    61\n",
       "1.0    49\n",
       "2.0    17\n",
       "3.0    72\n",
       "Name: count, dtype: int64"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "# Use pandas method chaining to process the data\n",
    "df_processed = (df\n",
    "    # Cluster the questions\n",
    "    .cluster_questions(\n",
    "        columns=questions, \n",
    "        #likert_mapping=custom_mapping, default handles most cases\n",
    "        umap_n_neighbors=15,\n",
    "        hdbscan_min_cluster_size=15,\n",
    "        cluster_selection_epsilon=0.35,\n",
    "        \n",
    "    )\n",
    ")\n",
    "\n",
    "# Get the list of encoded Likert columns\n",
    "likert_columns_with_prefix = [f\"likert_encoded_{q}\" for q in questions]\n",
    "\n",
    "# Display encoded data\n",
    "print(\"\\nEncoded Likert data:\")\n",
    "display(df_processed[['respondent_id'] + likert_columns_with_prefix].head())\n",
    "\n",
    "# Display clustering results\n",
    "print(\"\\nQuestion clustering results:\")\n",
    "display(df_processed[['respondent_id', 'question_cluster_id', 'question_cluster_probability']].head())\n",
    "\n",
    "\n",
    "# Use the cluster_heatmap_plot function to visualize cluster patterns\n",
    "print(\"\\nCluster heatmap showing the sentiment distribution across questions:\")\n",
    "heatmap = cluster_heatmap_plot(\n",
    "    df=df_processed,\n",
    "    x=\"question_cluster_id\",  # Cluster IDs as the x-axis\n",
    "    y=likert_columns_with_prefix,  # Encoded Likert columns to analyze\n",
    "    max_width=30  # For better readability\n",
    ")\n",
    "\n",
    "# Display the heatmap\n",
    "display(heatmap)\n",
    "\n",
    "# Let's also add a simple interpretation of the clusters\n",
    "cluster_summary = df_processed.groupby('question_cluster_id')[likert_columns_with_prefix].mean()\n",
    "print(\"\\nCluster averages for each question:\")\n",
    "display(cluster_summary)\n",
    "\n",
    "# Calculate respondent counts per cluster\n",
    "cluster_counts = df_processed['question_cluster_id'].value_counts().sort_index()\n",
    "print(\"\\nNumber of respondents in each cluster:\")\n",
    "display(cluster_counts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}